******************************************************************************************
* Do-file name:	cr_pseudo_panel_ado.do  
* Task:         ado file to create samples for pseudo-panel regressions  
* Last change:  09.02.2025    
******************************************************************************************


******************************************************************************************
*** collapse data on municipality level by 2 sex, 3 education & 3 age groups: wage sample
******************************************************************************************

keep  if y_nat != .

*** create variables to check collapse (created here so that they are not wasting space in cr_region_data.de)
** native employment by 2 sex, 3 education and 3 age groups
foreach x of numlist 0 1 {
* low educated (edu3=1)
gen emp_nat_sex`x'_edu31_age31 = 1  if nation_gr == 1 & female == `x' & imp_edu == 1 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_sex`x'_edu31_age32 = 1  if nation_gr == 1 & female == `x' & imp_edu == 1 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_sex`x'_edu31_age33 = 1  if nation_gr == 1 & female == `x' & imp_edu == 1 & (age >= 50 & age <= 65) & status == 1

* medium educated (edu3=2)
gen emp_nat_sex`x'_edu32_age31 = 1  if nation_gr == 1 & female == `x' & imp_edu == 2 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_sex`x'_edu32_age32 = 1  if nation_gr == 1 & female == `x' & imp_edu == 2 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_sex`x'_edu32_age33 = 1  if nation_gr == 1 & female == `x' & imp_edu == 2 & (age >= 50 & age <= 65) & status == 1

* high educated (edu3=3)
gen emp_nat_sex`x'_edu33_age31 = 1  if nation_gr == 1 & female == `x' & imp_edu == 3 & (age >= 16 & age <= 29) & status == 1
gen emp_nat_sex`x'_edu33_age32 = 1  if nation_gr == 1 & female == `x' & imp_edu == 3 & (age >= 30 & age <= 49) & status == 1
gen emp_nat_sex`x'_edu33_age33 = 1  if nation_gr == 1 & female == `x' & imp_edu == 3 & (age >= 50 & age <= 65) & status == 1
	}

** native wage by 2 sex, 3 education and 3 age groups
foreach x of numlist 0 1 { 
* low educated (edu3=1)
gen y_nat_sex`x'_edu31_age31 = impy  if nation_gr == 1 & female == `x' & imp_edu == 1 & (age >= 16 & age <= 29)
gen y_nat_sex`x'_edu31_age32 = impy  if nation_gr == 1 & female == `x' & imp_edu == 1 & (age >= 30 & age <= 49)
gen y_nat_sex`x'_edu31_age33 = impy  if nation_gr == 1 & female == `x' & imp_edu == 1 & (age >= 50 & age <= 65)

* medium educated (edu3=2)
gen y_nat_sex`x'_edu32_age31 = impy  if nation_gr == 1 & female == `x' & imp_edu == 2 & (age >= 16 & age <= 29)
gen y_nat_sex`x'_edu32_age32 = impy  if nation_gr == 1 & female == `x' & imp_edu == 2 & (age >= 30 & age <= 49)
gen y_nat_sex`x'_edu32_age33 = impy  if nation_gr == 1 & female == `x' & imp_edu == 2 & (age >= 50 & age <= 65)

* high educated (edu3=3)
gen y_nat_sex`x'_edu33_age31 = impy  if nation_gr == 1 & female == `x' & imp_edu == 3 & (age >= 16 & age <= 29)
gen y_nat_sex`x'_edu33_age32 = impy  if nation_gr == 1 & female == `x' & imp_edu == 3 & (age >= 30 & age <= 49)
gen y_nat_sex`x'_edu33_age33 = impy  if nation_gr == 1 & female == `x' & imp_edu == 3 & (age >= 50 & age <= 65)
	}

*** municipality level by 2 sex, 3 education & 3 age
collapse (mean) ao_kreis_imp weight_matching border_imp border_imp_13 control_imp ost distance ktyp ///
				y_nat y_nat_sex?_edu3?_age3? ///
		  (sum) native emp_nat emp_nat_sex?_edu3?_age3? ///
				[iw=weight_fte], by (ao_gem_imp year female imp_edu age_3)	

*** drop missings
drop if imp_edu == .

*** correct distance varaibale
bys ao_gem_imp: egen distmax=max(distance)	
drop distance
rename distmax distance

*** calculate outcome variables for wage growth
sort ao_gem_imp year female imp_edu age_3

** create group specific native employment in 1990 to be used as weight in regressions
// here: by 2 sex, 3 education & 3 age
gen emp_nat_90 = emp_nat  if year == 1990
bys ao_gem_imp female imp_edu age_3: egen emp_nat_90_max = max(emp_nat_90)
drop emp_nat_90
rename emp_nat_90_max emp_nat_90

** create group specific native wage growth in year X compared to 1990
// here: by 2 sex, 3 education & 3 age
gen y_nat_90 = y_nat  if year == 1990
bys ao_gem_imp female imp_edu age_3: egen y_nat_90_max = max(y_nat_90)
drop y_nat_90
rename y_nat_90_max y_nat_90

gen g_y_nat = y_nat - y_nat_90
replace g_y_nat = 0  if year == 1990

*** save sample for pseudo-panel wage estimation
sort ao_gem_imp  year  female  imp_edu  age_3
compress
label data "pseudo-panel wage estimation: 2 sex, 3 education & 3 age groups per municipality"
notes drop _dta
save "data/wage_region_sex2_edu3_age3.dta", replace
